This notebook walks through how I put together our tables for the MySQL portion of the course.
import pymysql
# Connect to the database
connection = pymysql.connect(host='',
cursorclass=pymysql.cursors.DictCursor)
cursor = connection.cursor()
Generally a good first step for setting up a database is to remove everything that is already there. This avoids any potential conflicts on creations/inserts.
Therefore we first DROP all of the tables we'll be using if they exist.
"""Drop tables"""
tables = ['friends', 'favorites', 'retweets', 'statuses', 'users']
for table in tables:
print(table)
drop_query = f"""DROP TABLE IF EXISTS {table};"""
cursor.execute(drop_query)
connection.commit()
Once we've cleared things out, we can start creating the tables we'll be using.
We create a table by calling CREATE TABLE table_name(colls). One thing to keep in mind is the use of PRIMARY KEY(coll), this simply identifies the column that will be used as the primary key.
"""Create users table to track twitter users"""
make_user_table = """CREATE TABLE users(
created_date DATE,
description VARCHAR(255),
favorites_count INT,
friends_count INT,
user_id VARCHAR(255),
listed_count INT,
location VARCHAR(255),
name VARCHAR(255),
screen_name VARCHAR(255),
statuses_count INT,
url VARCHAR(255),
verified BOOL,
PRIMARY KEY(user_id));"""
cursor.execute(make_user_table)
connection.commit()
All subsequent table creation queries will look relatively the same, the key difference is the FOREIGN KEY(coll) statement. This statement defines a relationship to another table (which is a hard rule - no insertions that don't meet constraints).
Notes:
"""Drop and create friends table to track twitter friends"""
make_friends_table = """CREATE TABLE friends(
user_id VARCHAR(255),
user_screen_name VARCHAR(255),
friend_id VARCHAR(255),
friend_screen_name VARCHAR(255),
PRIMARY KEY (user_id, friend_id),
FOREIGN KEY (user_id)
REFERENCES users(user_id)
ON DELETE CASCADE,
FOREIGN KEY (friend_id)
REFERENCES users(user_id)
ON DELETE CASCADE);"""
cursor.execute(make_friends_table)
connection.commit()
Now we can simply create the remaining tables:
"""Drop and create friends table to track twitter friends"""
make_status_table = """CREATE TABLE statuses(
created_date DATE,
favorites_count INT,
status_id VARCHAR(255),
lang VARCHAR(255),
retweet_count INT,
source VARCHAR(255),
text TEXT,
truncated BOOL,
user_id VARCHAR(255),
PRIMARY KEY (status_id),
FOREIGN KEY (user_id)
REFERENCES users(user_id)
ON DELETE CASCADE);"""
cursor.execute(make_status_table)
connection.commit()
"""Drop and create favorites table to track favorited tweets"""
make_favs_table = """CREATE TABLE favorites(
status_id VARCHAR(255),
user_id VARCHAR(255),
user_screen_name VARCHAR(255),
PRIMARY KEY (status_id, user_id),
FOREIGN KEY (user_id)
REFERENCES users(user_id)
ON DELETE CASCADE,
FOREIGN KEY (status_id)
REFERENCES statuses(status_id)
ON DELETE CASCADE);"""
cursor.execute(make_favs_table)
connection.commit()
"""Drop and create favorites table to track favorited tweets"""
make_retweets_table = """CREATE TABLE retweets(
created_date DATE,
status_id VARCHAR(255),
retweeted_status VARCHAR(255),
lang VARCHAR(255),
retweet_count INT,
source VARCHAR(255),
text TEXT,
user_id VARCHAR(255),
PRIMARY KEY (status_id),
FOREIGN KEY (user_id)
REFERENCES users(user_id)
ON DELETE CASCADE,
FOREIGN KEY (retweeted_status)
REFERENCES statuses(status_id)
ON DELETE CASCADE);"""
cursor.execute(make_retweets_table)
connection.commit()